{
 "cells": [
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 1. 处理缺失值",
   "id": "8492eae777eadecf"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 1.1 删除行演示",
   "id": "8994523834ee387f"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-12T06:07:08.791010Z",
     "start_time": "2025-09-12T06:07:08.759041Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "path = 'D:/2506A/monty03/day15/file/'\n",
    "\n",
    "df = pd.read_excel(path + '成绩.xlsx',index_col='序号')\n",
    "print(df)\n",
    "\n",
    "df.drop(7,inplace=True) # 删除小标是7的那一行\n",
    "print(df)\n",
    "\n",
    "df.drop([2,3],inplace=True)\n",
    "print(df)"
   ],
   "id": "6dce04b4e7c4508f",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     姓名    理论    机试    品德\n",
      "序号                       \n",
      "3   聂茹凤  70.0  32.0  15.0\n",
      "2   谭鑫宇   NaN  99.0   NaN\n",
      "1   韩耀祖  65.0   NaN  98.0\n",
      "5   刘千琪  75.0  36.0   NaN\n",
      "4   崔龙腾   NaN   NaN  44.0\n",
      "7   李欣桐  96.0  45.0   NaN\n",
      "6   李兆康  85.0   NaN  33.0\n",
      "     姓名    理论    机试    品德\n",
      "序号                       \n",
      "3   聂茹凤  70.0  32.0  15.0\n",
      "2   谭鑫宇   NaN  99.0   NaN\n",
      "1   韩耀祖  65.0   NaN  98.0\n",
      "5   刘千琪  75.0  36.0   NaN\n",
      "4   崔龙腾   NaN   NaN  44.0\n",
      "6   李兆康  85.0   NaN  33.0\n",
      "     姓名    理论    机试    品德\n",
      "序号                       \n",
      "1   韩耀祖  65.0   NaN  98.0\n",
      "5   刘千琪  75.0  36.0   NaN\n",
      "4   崔龙腾   NaN   NaN  44.0\n",
      "6   李兆康  85.0   NaN  33.0\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 1.2 删除列演示",
   "id": "fd996ff34465069"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-12T06:09:38.318111Z",
     "start_time": "2025-09-12T06:09:38.289520Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.read_excel(path + '成绩.xlsx',index_col='序号')\n",
    "print(df)\n",
    "# df.drop(labels='品德',inplace=True,axis=1)\n",
    "df.drop(labels=['机试','理论','品德'],inplace=True,axis=1)\n",
    "\n",
    "print(df)"
   ],
   "id": "6d4e722df18b0f6e",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     姓名    理论    机试    品德\n",
      "序号                       \n",
      "3   聂茹凤  70.0  32.0  15.0\n",
      "2   谭鑫宇   NaN  99.0   NaN\n",
      "1   韩耀祖  65.0   NaN  98.0\n",
      "5   刘千琪  75.0  36.0   NaN\n",
      "4   崔龙腾   NaN   NaN  44.0\n",
      "7   李欣桐  96.0  45.0   NaN\n",
      "6   李兆康  85.0   NaN  33.0\n",
      "     姓名\n",
      "序号     \n",
      "3   聂茹凤\n",
      "2   谭鑫宇\n",
      "1   韩耀祖\n",
      "5   刘千琪\n",
      "4   崔龙腾\n",
      "7   李欣桐\n",
      "6   李兆康\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 1.3 删除缺失值",
   "id": "4ebbee437e0ae9de"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-12T06:18:16.864900Z",
     "start_time": "2025-09-12T06:18:16.832927Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.read_excel(path + '成绩.xlsx',index_col='序号')\n",
    "print(df)\n",
    "\n",
    "# df.dropna(inplace=True) # 默认 any ,有一个缺失值就删除\n",
    "# 删除有缺失值的列\n",
    "# df.dropna(inplace=True,axis=1)\n",
    "# 全部为NAN则删除\n",
    "# df.dropna(inplace=True,axis=0,how='all')\n",
    "\n",
    "# 至少保留三个非控值\n",
    "# df.dropna(thresh=3,inplace=True)\n",
    "\n",
    "# 指定检查的列\n",
    "df.dropna(subset=['品德'],inplace=True)\n",
    "print(df)"
   ],
   "id": "6b4fb331724d17f2",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      姓名    理论    机试    品德\n",
      "序号                        \n",
      "3.0  聂茹凤  70.0  32.0  15.0\n",
      "2.0  谭鑫宇   NaN  99.0   NaN\n",
      "1.0  韩耀祖  65.0   NaN  98.0\n",
      "NaN  NaN   NaN   NaN   NaN\n",
      "5.0  刘千琪  75.0  36.0   NaN\n",
      "4.0  崔龙腾   NaN   NaN  44.0\n",
      "NaN  NaN   NaN   NaN   NaN\n",
      "7.0  李欣桐  96.0  45.0   NaN\n",
      "6.0  李兆康  85.0   NaN  33.0\n",
      "8.0  李伟聪   NaN   NaN   NaN\n",
      "      姓名    理论    机试    品德\n",
      "序号                        \n",
      "3.0  聂茹凤  70.0  32.0  15.0\n",
      "1.0  韩耀祖  65.0   NaN  98.0\n",
      "4.0  崔龙腾   NaN   NaN  44.0\n",
      "6.0  李兆康  85.0   NaN  33.0\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 1.4 填充缺失值",
   "id": "9b4b86099b2d0ecf"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-12T06:28:04.990216Z",
     "start_time": "2025-09-12T06:28:04.960953Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.read_excel(path + '成绩.xlsx',index_col='序号')\n",
    "# 删除全部是NAN的数据\n",
    "df.dropna(inplace=True,how='all')\n",
    "print(df)\n",
    "\n",
    "# 填充常数 0\n",
    "# df.fillna(0,inplace=True)\n",
    "# 按照列填充不同的\n",
    "# df.fillna(inplace=True,value={'理论':0.1,'机试':0.2,'品德':0.3})\n",
    "\n",
    "# 向前填充\n",
    "# df.ffill(inplace=True)\n",
    "\n",
    "# 向后填充\n",
    "# df.bfill(inplace=True)\n",
    "\n",
    "# 设置连续填充几个缺失值\n",
    "df.ffill(limit=1,inplace=True)\n",
    "\n",
    "print(df)"
   ],
   "id": "504d0a682a6c912",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      姓名    理论    机试    品德\n",
      "序号                        \n",
      "3.0  聂茹凤  70.0  32.0  15.0\n",
      "2.0  谭鑫宇   NaN  99.0   NaN\n",
      "1.0  韩耀祖   NaN   NaN  98.0\n",
      "5.0  刘千琪  75.0  36.0   NaN\n",
      "4.0  崔龙腾   NaN   NaN  44.0\n",
      "7.0  李欣桐  96.0  45.0   NaN\n",
      "6.0  李兆康  85.0   NaN  33.0\n",
      "8.0  李伟聪   NaN   NaN   NaN\n",
      "      姓名    理论    机试    品德\n",
      "序号                        \n",
      "3.0  聂茹凤  70.0  32.0  15.0\n",
      "2.0  谭鑫宇  70.0  99.0  15.0\n",
      "1.0  韩耀祖   NaN  99.0  98.0\n",
      "5.0  刘千琪  75.0  36.0  98.0\n",
      "4.0  崔龙腾  75.0  36.0  44.0\n",
      "7.0  李欣桐  96.0  45.0  44.0\n",
      "6.0  李兆康  85.0  45.0  33.0\n",
      "8.0  李伟聪  85.0   NaN  33.0\n"
     ]
    }
   ],
   "execution_count": 33
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
